Immune predictions correlated with multiplier latent variable expression

First we get the LV multiplier values from the nf-lv-viz repo and deconvolution predictions from Synapse

#get immune predictions
dtab<-synapser::synTableQuery(paste('select * from',deconv_scores))$asDataFrame()%>%
  rename(immScore='score')
## 
Downloading  [####################]100.00%   1.1MB/1.1MB (2.0MB/s) Job-9807444193022174007049834.csv Done...
##get latent variable scores
fn <- tempfile(pattern = "", fileext = ".feather")
download.file('https://github.com/Sage-Bionetworks/nf-lv-viz/raw/master/data/filt_nf_mp_res.feather', destfile = fn)

mp_res<- feather::read_feather(fn) %>% filter(sex != "NA", isCellLine != "TRUE") 

dtab<-subset(dtab,method!='xcell')

combined=dtab%>%select(c(cell_type,method,specimenID,immScore))%>%
  left_join(mp_res,by='specimenID')

Then we can compute the correlation of each cell type measurement with each protein

corVals=combined%>%group_by(cell_type,latent_var,method)%>%summarize(corVal=cor(immScore,value,use='pairwise.complete.obs'))

DT::datatable(corVals)

Plotting LV correlations

We are interested in particular protein signaling activity that might give rise to specific immune phonetypes.

##now how do we bracket them?
##plot correlation distributions by cell type and method. 
require(ggplot2)
p<-ggplot(corVals)+geom_boxplot(aes(x=cell_type,y=corVal,fill=method))+ theme(axis.text.x = element_text(angle = 45, hjust = 1))+ggtitle("Correlation of MultiPLIER Latent Variables with predicted cell type")
print(p)

There are some latent variables that show up as highly correlated. By choosing a threshold, we can evaluate what they are in more detail.

corthresh=0.65

##now filter to the cell types with correlated latent variables
cor_cell_types=subset(corVals,corVal>corthresh)%>%ungroup()%>%
  select(cell_type,method)%>%unique()
print(paste('we found',nrow(cor_cell_types),'cell types with some latent variable correlation greater than',corthresh))
## [1] "we found 13 cell types with some latent variable correlation greater than 0.65"
apply(cor_cell_types,1,function(x){
  ct=x[['cell_type']]
  m=x[['method']]

  #for each gene and cell type
  lvs=subset(corVals,cell_type==ct)%>%
        subset(corVal>corthresh)%>%
    subset(method==m)%>%arrange(desc(corVal))%>%
      ungroup()

    if(nrow(lvs)>12){
    new.corthresh=format(lvs$corVal[15],digits=3)
    lvs=lvs[1:12,]
  }else{
    new.corthresh=corthresh
  }

  scores=subset(combined,latent_var%in%lvs$latent_var)%>%subset(cell_type==ct)%>%subset(method==m)

  p2<- ggplot(scores)+
      geom_point(aes(x=immScore,y=value,
          col=latent_var,shape=tumorType))+
    scale_x_log10()+
      ggtitle(paste(m,'predictions of',ct,'correlation >',new.corthresh))
  print(p2)
 # ggsave(paste0(m,'predictions of',gsub(" ","",gsub("/","",ct)),'cor',new.corthresh,'.pdf'))
})

## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

#parentid='syn20710537'
#for(fi in list.files('.')[grep('tions',list.files('.'))])
#  synapser::synStore(synapser::File(fi,parentId=parentid,annotations=list(resourceType='analysis',isMultiSpecimen='TRUE',isMultiIndividual='TRUE')),used=c(deconv_scores,metaviper_scores),executed=this.script)